/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Created on: 12/28/2022
Last Modified on: 2/13/2024

Description: This program generates all of the figures in our paper, except for 
Figure 1 (which simply describes the CPS process), Figure 4, and the right-hand side 
of Figure A2, both of which come from NCANDS data. 

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

**************************
**(0) SETUP
**************************
clear
set more off
macro drop all
capture log close
set seed 02042023

*Set directories 
global cleandata 
global tmpdata 
global output 
global code 

**************************
**FIGURE 2
**************************
*Panel A: 
use "${cleandata}screener_placement_maltreatment_rates_se_qje.dta", clear 

foreach x in D_w D_b {
		cap drop `x'2
		replace `x' = 1-`x'
		gen `x'2 = `x'^2
}

		foreach r in b w {
			*inverse variance of the judge FE
			gen `r'_var=1/(`r'_se_fe_inv6m^2)

			*grid to plot fitted values
			sum D_`r'
			range `r'_points 0 `r(max)'

			*linear regression
			reg Y_`r' D_`r' [aw=`r'_var]
			gen `r'_fitted_1=_b[_cons]+`r'_points*_b[D_`r']
			sum `r'_fitted_1 if `r'_points==0
			local ate_`r'_lin=`r(mean)'
			
			*quadratic regression
			reg Y_`r' D_`r' D_`r'2 [aw=`r'_var]
			gen `r'_fitted_2=_b[_cons]+`r'_points*_b[D_`r'] + `r'_points^2*_b[D_`r'2]
			sum `r'_fitted_2 if `r'_points==0
			local ate_`r'_qex=`r(mean)'
			
			*local linear regression
			lpoly Y_`r' D_`r' [aw=`r'_var], k(gaus) bw(.04) degree(1) nograph at(`r'_points) gen(y`r' x`r')
			disp r(bwidth)	
			sum x`r' if y`r'==0
			local ate_`r'_llin=`r(mean)'
		}

		*Generate binscatter plots:
		binscatter Y_w D_w [aw=w_var], savedata(white) replace 
		binscatter Y_b D_b [aw=b_var], savedata(black) replace 
		gen counter = _n 

		*Save binscatter estimates to combine with extrapolations above	
		foreach x in white black {
		preserve 
		clear 
		insheet using "`x'.csv", clear
		gen counter = _n 
		save `x'.dta, replace 
		restore 
		}

		preserve 
		use white.dta, replace 
		merge 1:1 counter using black.dta 
		cap drop _merge 
		save binscatter.dta, replace 
		restore 

		cap drop _merge 
		merge 1:1 counter using binscatter.dta
		
cap erase "${code}black.dta"			
cap erase "${code}white.dta"
cap erase "${code}binscatter.dta"	
cap erase "${code}black.csv"
cap erase "${code}white.csv"	
cap erase "${code}black.do"
cap erase "${code}white.do"

*Plot results: binscatter plus extrapolations 
set scheme plotplain 
local color1 "sea"
local color2 "orange*.75"
scatter y_w d_w , mcolor(`color1') msize(medium) msymbol(smcircle_hollow) ///
				|| scatter y_b d_b, mcolor(`color2') msize(medium) msymbol(X)   ///
				|| line w_fitted_1 w_points, lcolor(`color1') lp(solid) lw(.4) ///	
				|| line b_fitted_1 b_points , lcolor(`color2') lp(solid) lw(.4) /// 
				|| line w_fitted_2 w_points, lcolor(`color1') lp(dash) lw(.4) ///
				|| line b_fitted_2 b_points , lcolor(`color2') lp(dash) lw(.4) ///
				|| line xw yw, lcolor(`color1') lp(shortdash) lw(.4) ///
				|| line xb yb, lcolor(`color2') lp(shortdash) lw(.4) ///
				ytitle("Subsequent Maltreatment, When Left at Home") ///
				xtitle("Placement Rate")  ///
				legend(order(1 "White Estimate" 2 "Black Estimate" 3 "White Linear Fit" 4 ///
				"Black Linear Fit" 5 "White Quadratic Fit" 6 "Black Quadratic Fit" 7 ///
				"White Local Linear Fit" 8 "Black Local Linear Fit"))  ///
				graphregion(color(white)) bgcolor(white) ///
				xlabel(0 "0" .05 ".05" .1 ".1") ///
				ylabel(.05 ".05" .1 ".1" .15 ".15" .2 ".2")
				graph export "${output}figure2a.pdf", replace 


*Panel B:
use "${cleandata}inv_adjusted_rates_se_omitted_qje.dta", clear

foreach x in w_nofc b_nofc { 
	replace `x' = 1 - `x'
}
	rename (w_nofc b_nofc w_inv6m b_inv6m) (D_w D_b Y_w Y_b)
	rename (w_se_nofc b_se_nofc) (w_se_fe_nofc b_se_fe_nofc)
	
	gen D_w2 = D_w*D_w 
	gen D_b2 = D_b*D_b
	
	sum bshare 
	local bshare = r(mean)

		foreach r in b w {
			*inverse variance of the judge FE
			gen `r'_var=1/(`r'_se_inv6m^2)

			*grid to plot fitted values
			sum D_`r'
			range `r'_points `r(min)' .15

			*linear regression
			reg Y_`r' D_`r' [aw=`r'_var]
			gen `r'_fitted_1=_b[_cons]+`r'_points*_b[D_`r']
			sum `r'_fitted_1 if `r'_points==0
			local ate_`r'_lin=`r(mean)'
			
			*quadratic regression
			reg Y_`r' D_`r' D_`r'2 [aw=`r'_var]
			gen `r'_fitted_2=_b[_cons]+`r'_points*_b[D_`r'] + `r'_points^2*_b[D_`r'2]
			sum `r'_fitted_2 if `r'_points==0
			local ate_`r'_qex=`r(mean)'
			
			*local linear regression
			lpoly Y_`r' D_`r' [aw=`r'_var], bw(0.1) k(gaus) degree(1) nograph at(`r'_points) gen(y`r' x`r')
			disp r(bwidth)	
			sum x`r' if y`r'==0
			local ate_`r'_llin=`r(mean)'
		}
	
		*Generate binscatter plots:
		binscatter Y_w D_w [aw=w_var], savedata(white) replace 
		binscatter Y_b D_b [aw=b_var], savedata(black) replace 
		gen counter = _n 

		*Save binscatter estimates and combine with extrapolations above	
		foreach x in white black {
		preserve 
		clear 
		insheet using "`x'.csv", clear
		gen counter = _n 
		save `x'.dta, replace 
		restore 
			}

		preserve 
		use white.dta, replace 
		merge 1:1 counter using black.dta 
		drop _merge 
		save binscatter.dta, replace 
		restore 
 
		cap drop _merge 
		merge 1:1 counter using binscatter.dta
		
cap erase "${code}black.dta"			
cap erase "${code}white.dta"
cap erase "${code}binscatter.dta"	
cap erase "${code}black.csv"
cap erase "${code}white.csv"	
cap erase "${code}black.do"
cap erase "${code}white.do"

*Plot results: binscatter plus extrapolations 
local color1 "sea"
local color2 "orange*.75"
scatter y_w d_w , mcolor(`color1') msize(medium) msymbol(smcircle_hollow) ///
				|| scatter y_b d_b, mcolor(`color2') msize(medium) msymbol(X)   ///
				|| line w_fitted_1 w_points, lcolor(`color1') lp(solid) lw(.4) ///	
				|| line b_fitted_1 b_points , lcolor(`color2') lp(solid) lw(.4) /// 
				|| line w_fitted_2 w_points, lcolor(`color1') lp(dash) lw(.4) ///
				|| line b_fitted_2 b_points , lcolor(`color2') lp(dash) lw(.4) ///
				|| line xw yw, lcolor(`color1') lp(shortdash) lw(.4) ///
				|| line xb yb, lcolor(`color2') lp(shortdash) lw(.4) ///
				ytitle("Subsequent Maltreatment, When Left at Home") ///
				xtitle("Placement Rate")  ///
				legend(order(1 "White Estimate" 2 "Black Estimate" 3 "White Linear Fit" 4 ///
				"Black Linear Fit" 5 "White Quadratic Fit" 6 "Black Quadratic Fit" 7 ///
				"White Local Linear Fit" 8 "Black Local Linear Fit"))  ///
				graphregion(color(white)) bgcolor(white) ///
				xlabel(0 "0" .05 ".05" .1 ".1" .15 ".15") ///
				ylabel(.05 ".05" .1 ".1" .15 ".15" .2 ".2")
				graph export "${output}figure2b.pdf", replace				


**************************
**FIGURE 3
**************************
*Panel A: 
use "${cleandata}screener_screening_maltreatment_rates_qje.dta", clear 	
rename (b_screened w_screened b_inv6m w_inv6m) (D_b D_w Y_b Y_w)
foreach x in D_b D_w {
	replace `x' = 1-`x'
	gen `x'2 = `x'*`x'
}	
	gen bshare = count_black / count_inv 
	sum bshare [aw=count_inv]
	local bshare = `r(mean)'

preserve
use "${cleandata}screener_placement_maltreatment_rates_se_qje.dta", clear 	
	foreach r in b w {
			*inverse variance of the investigator FE
			gen `r'_var=1/(`r'_se_fe_inv6m^2)

			*grid to plot fitted values
			sum D_`r'
			range `r'_points `r(min)' 1 100

			*linear regression
			reg Y_`r' D_`r' [aw=`r'_var]
			gen `r'_fitted_1=_b[_cons]+`r'_points*_b[D_`r']
			sum `r'_fitted_1 if `r'_points==1
			gen lin_ate_`r'=`r(mean)'
			local lin_ate_`r'=`r(mean)'
			
			*quadratic regression
			reg Y_`r' D_`r' D_`r'2 [aw=`r'_var]
			gen `r'_fitted_2=_b[_cons]+`r'_points*_b[D_`r'] + `r'_points^2*_b[D_`r'2]
			sum `r'_fitted_2 if `r'_points==1
			gen qex_ate_`r'=`r(mean)'
			local qex_ate_`r'=`r(mean)'			
			
			*local linear regression
			lpoly Y_`r' D_`r' [aw=`r'_var], bw(0.1) k(gaus) degree(1) nograph at(`r'_points) gen(y`r' x`r')
			disp r(bwidth)	
			sum x`r' if y`r'==1
			gen llin_ate_`r'=`r(mean)'
			local llin_ate_`r'=`r(mean)'
	}
restore 	
	
	*simulate and plot mean UD estimates, over ATE range
	matrix hyp=J(11000,3,.)
	local row=1
	forval ate_b=0.125(0.001)0.151 {
		forval ate_w=0.137(0.001)0.154 {
			local ate=`ate_b'*bshare+`ate_w'*(1-bshare)
			gen hyp_adj_gap=((D_w*Y_w)/`ate_w' ///
				-(D_b*Y_b)/`ate_b')*`ate' ///
			   +((D_w*(1-Y_w))/(1-`ate_w') ///
				-(D_b*(1-Y_b))/(1-`ate_b'))*(1-`ate')
			qui summ hyp_adj_gap [aw=count_inv]
			matrix hyp[`row',3]=r(mean)
			matrix hyp[`row',1]=`ate_b'
			matrix hyp[`row',2]=`ate_w'
			local ++row
			drop hyp_adj_gap
		}
	}
	clear
	svmat hyp
 	drop if hyp1 < 0.1 | hyp1 > 0.17
	set scheme plotplain 
	twoway contour hyp3 hyp1 hyp2, ccuts(0.0495 0.050 0.0505 0.051) ztitle("Average Screener UD") xtitle("White Mean Risk") ytitle("Black Mean Risk") ///
				ccolors("252 199 146" "243 174 97" "235 150 48" "227 126 0") ///
									  || scatteri `lin_ate_b' 0.137 `lin_ate_b' 0.154, c(l) lc(black) lp(solid) m(i) || scatteri 0.125 `lin_ate_w' 0.151 `lin_ate_w', ///
		c(l) lc(black) lp(solid) m(i) || scatteri `quad_ate_b' 0.137 `quad_ate_b' 0.154, c(l) lc(black) lp(dash) m(i) || scatteri 0.125 `quad_ate_w' 0.151 `quad_ate_w', ///
		c(l) lc(black) lp(dash) m(i)  || scatteri `llin_ate_b' 0.137 `llin_ate_b' 0.154, c(l) lc(black) lp(shortdash) m(i) || scatteri 0.125 `llin_ate_w' 0.151 `llin_ate_w', ///
		c(l) lc(black) lp(shortdash) m(i) legend(order(1 3 5) label(1 "Linear") label(3 "Quadratic") label(5 "Local Linear") cols(3)) scheme(s1color)
				graph export "${output}figure3a.pdf", replace 


*Panel B: 
	use "${cleandata}inv_adjusted_rates_se_omitted_qje.dta", clear
	rename (w_nofc b_nofc w_inv6m b_inv6m) (D_w D_b Y_w Y_b)
	rename (w_se_nofc b_se_nofc) (w_se_fe_nofc b_se_fe_nofc)
	
	*Define variables needed for quadratic regression
	gen D_w2 = D_w*D_w 
	gen D_b2 = D_b*D_b
	
	sum bshare 
	local bshare = r(mean)

	*compute ATE estimates
	foreach r in b w {
		gen `r'_var=1/(`r'_se_inv6m^2)
		qui reg Y_`r' D_`r' [aw=`r'_var]
		local lin_ate_`r'=_b[_cons]+_b[D_`r']
		qui reg Y_`r' D_`r' D_`r'2 [aw=`r'_var]
		local quad_ate_`r'=_b[_cons]+_b[D_`r']+_b[D_`r'2]
		sum D_`r'
		range `r'_points `r(min)' 1 100
		lpoly Y_`r' D_`r' [aw=`r'_var], k(gaus) deg(1) bw(0.1) nograph at(`r'_points) gen(y`r' x`r')
		summ x`r' if y`r'==1
		local llin_ate_`r'=r(mean)
		disp `lin_ate_`r''
		display `llin_ate_`r''
		disp `quad_ate_`r''
	}	

	*simulate and plot mean UD estimates, over ATE range
	matrix hyp=J(11000,3,.)
	local row=1
	forval ate_b=0.139(0.001)0.181 {
		forval ate_w=0.164(0.001)0.194 {
			local ate=`ate_b'*bshare+`ate_w'*(1-bshare)
			gen hyp_adj_gap=((D_w*Y_w)/`ate_w' ///
				-(D_b*Y_b)/`ate_b')*`ate' ///
			   +((D_w*(1-Y_w))/(1-`ate_w') ///
				-(D_b*(1-Y_b))/(1-`ate_b'))*(1-`ate')
			qui summ hyp_adj_gap [aw=count_inv]
			matrix hyp[`row',3]=r(mean)
			matrix hyp[`row',1]=`ate_b'
			matrix hyp[`row',2]=`ate_w'
			local ++row
			drop hyp_adj_gap
		}
	}
	clear
	svmat hyp
 	drop if hyp1 < 0.1| hyp1 > 0.25
	set scheme plotplain 
	twoway contour hyp3 hyp1 hyp2, ccuts(0.012 0.015 0.018 0.021) ztitle("Average Investigator UD") xtitle("White Mean Risk") ytitle("Black Mean Risk") ///
				ccolors("252 199 146" "243 174 97" "235 150 48" "227 126 0")    /// 
		|| scatteri `lin_ate_b' 0.164 `lin_ate_b' 0.194, c(l) lc(black) lp(solid) m(i) || scatteri 0.139 `lin_ate_w' 0.181 `lin_ate_w', ///
		c(l) lc(black) lp(solid) m(i) || scatteri `quad_ate_b' 0.164 `quad_ate_b' 0.194, c(l) lc(black) lp(dash) m(i) || scatteri 0.139 `quad_ate_w' 0.181 `quad_ate_w', ///
		c(l) lc(black) lp(dash) m(i) || scatteri `llin_ate_b' 0.164 `llin_ate_b' 0.194, c(l) lc(black) lp(shortdash) m(i) || scatteri 0.139 `llin_ate_w' 0.181 `llin_ate_w', ///
		c(l) lc(black) lp(shortdash) m(i) legend(order(1 3 5) label(1 "Linear") label(3 "Quadratic") label(5 "Local Linear") cols(3)) scheme(s1color)
	graph export "${output}figure3b.pdf", replace
	
	
	
**************************
**FIGURE A1
**************************
use "${tmpdata}inv_fc_maltreatment_rates_qje.dta", clear 
merge 1:1 worker_id using "${cleandata}inv_adjusted_rates_se_omitted_qje.dta", keepus(w_nofc b_nofc) keep(1 3)
	
	foreach x in w_nofc b_nofc {
		replace `x' = 1 - `x'
	}
rename (w_nofc b_nofc w_inv6m b_inv6m) (D_w D_b Y_w Y_b)
	
		foreach r in b w {
			*inverse variance of the judge FE
			gen `r'_var=1/(`r'_se_inv6m^2)

			*grid to plot fitted values
			sum D_`r'
			range `r'_points 0 .2

			*linear regression
			reg Y_`r' D_`r' [aw=`r'_var], robust 
			gen `r'_fitted_1=_b[_cons]+`r'_points*_b[D_`r']
			sum `r'_fitted_1 if `r'_points==0
			local ate_`r'_lin=`r(mean)'
		}
		
		*Generate binscatter plots:
		binscatter Y_w D_w [aw=w_var], savedata(white) replace 
		binscatter Y_b D_b [aw=b_var], savedata(black) replace 

		*Save binscatter estimates as individual datasets and combine with extrapolations above	
		gen counter = _n 
		foreach x in white black {
		preserve 
		clear 
		insheet using "`x'.csv", clear
		gen counter = _n 
		save `x'.dta, replace 
		restore 
			}

		preserve 
		use white.dta, replace 
		merge 1:1 counter using black.dta 
		drop _merge 
		save binscatter.dta, replace 
		restore 

		cap drop _merge 
		merge 1:1 counter using binscatter.dta
			
		cap erase "${code}black.dta"			
		cap erase "${code}white.dta"
		cap erase "${code}binscatter.dta"	
		cap erase "${code}black.csv"
		cap erase "${code}white.csv"	
		cap erase "${code}black.do"
		cap erase "${code}white.do"

*Plot results: binscatter plus extrapolations 
local color1 "sea"
local color2 "orange*.75"
scatter y_w d_w , mcolor(`color1') msize(medium) msymbol(smcircle_hollow) ///
				|| scatter y_b d_b, mcolor(`color2') msize(medium) msymbol(X)   ///
				|| line w_fitted_1 w_points, lcolor(`color1') lp(solid) lw(.4) ///	
				|| line b_fitted_1 b_points , lcolor(`color2') lp(solid) lw(.4) /// 
				graphregion(color(white)) bgcolor(white) ///
				ytitle("Subsequent Maltreatment, When Placed in Foster Care") ///
				xtitle("Placement Rate")  ///
				legend(order(1 "White Estimate" 2 "Black Estimate" 3 "White Linear Fit" 4 ///
				"Black Linear Fit"))  ///
				xlabel(0 "0" .05 ".05" .1 ".1" .15 ".15" .2 ".2") ///
				ylabel(0 "0" .05 ".05" .1 ".1" .15 ".15" .2 ".2")	
				graph export "${output}figureA1.pdf", replace	
	
**************************
**FIGURE A2 (LEFT-HAND SIDE)
**************************
	*Panel A:
	use "${cleandata}inv_adjusted_rates_se_omitted_qje.dta", clear
	rename (w_nofc b_nofc w_inv6m b_inv6m) (D_w D_b Y_w Y_b)
	rename (w_se_nofc b_se_nofc) (w_se_fe_nofc b_se_fe_nofc)

	sum bshare 
	local bshare = r(mean)

	*simulate and plot mean UD estimates, over ATE range
	matrix hyp=J(11000,3,.)
	local row=1
	forval ate_b=0.139(0.001)0.181 {
		forval ate_w=0.164(0.001)0.194 {
			local ate=`ate_b'*bshare+`ate_w'*(1-bshare)
			gen hyp_adj_gap=((D_w*Y_w)/`ate_w' ///
				-(D_b*Y_b)/`ate_b')*`ate' ///
			   +((D_w*(1-Y_w))/(1-`ate_w') ///
				-(D_b*(1-Y_b))/(1-`ate_b'))*(1-`ate')
			qui summ hyp_adj_gap [aw=count_inv]
			matrix hyp[`row',3]=r(mean)
			matrix hyp[`row',1]=`ate_b'
			matrix hyp[`row',2]=`ate_w'
			local ++row
			drop hyp_adj_gap
		}
	}
	clear
	svmat hyp
 	drop if hyp1 < 0.1| hyp1 > 0.25
	set scheme plotplain 
	twoway contour hyp3 hyp1 hyp2, ccuts(0.012 0.015 0.018 0.021) ztitle("Average Investigator UD") xtitle("White Mean Risk") ytitle("Black Mean Risk") ///
				ccolors("252 199 146" "243 174 97" "235 150 48" "227 126 0") 
	graph export "${output}figureA2a.pdf", replace
	

	*Panel B:
	use "${cleandata}inv_adjusted_rates_se_omitted_qje.dta", clear
	rename (w_nofc b_nofc w_inv6m b_inv6m) (D_w D_b Y_w Y_b)
	rename (w_se_nofc b_se_nofc) (w_se_fe_nofc b_se_fe_nofc)
	
	*simulate and plot mean UD estimates, over ATE range
	matrix hyp=J(11000,3,.)
	local row=1
	forval ate_b=0.139(0.001)0.181 {
		forval ate_w=0.164(0.001)0.194 {
			local ate=`ate_b'*bshare+`ate_w'*(1-bshare)
			gen hyp_adj_gap=(D_w*(1-Y_w))/(1-`ate_w') ///
				-(D_b*(1-Y_b))/(1-`ate_b')
			qui summ hyp_adj_gap [aw=count_inv]
			matrix hyp[`row',3]=r(mean)
			matrix hyp[`row',1]=`ate_b'
			matrix hyp[`row',2]=`ate_w'
			local ++row
			drop hyp_adj_gap
		}
	}
	clear
	svmat hyp
 	drop if hyp1 < 0.1| hyp1 > 0.25
	set scheme plotplain 
	twoway contour hyp3 hyp1 hyp2, ccuts(-.02 0 .02 .04) ztitle("Average Investigator UD, Y*=0") xtitle("White Mean Risk") ytitle("Black Mean Risk") ///
		ccolors("42 157 244" "208 239 255" "250 245 145" "255 195 52" "dkorange")  
	graph export "${output}figureA2b.pdf", replace
	

*Panel C:
	use "${cleandata}inv_adjusted_rates_se_omitted_qje.dta", clear
	rename (w_nofc b_nofc w_inv6m b_inv6m) (D_w D_b Y_w Y_b)
	rename (w_se_nofc b_se_nofc) (w_se_fe_nofc b_se_fe_nofc)
	
	*simulate and plot mean UD estimates, over ATE range
	matrix hyp=J(11000,3,.)
	local row=1
	forval ate_b=0.139(0.001)0.181 {
		forval ate_w=0.164(0.001)0.194 {
			local ate=`ate_b'*bshare+`ate_w'*(1-bshare)
			gen hyp_adj_gap=((D_w*Y_w)/`ate_w' ///
				-(D_b*Y_b)/`ate_b')
			qui summ hyp_adj_gap [aw=count_inv]
			matrix hyp[`row',3]=r(mean)
			matrix hyp[`row',1]=`ate_b'
			matrix hyp[`row',2]=`ate_w'
			local ++row
			drop hyp_adj_gap
		}
	}
	clear
	svmat hyp
 	drop if hyp1 < 0.1| hyp1 > 0.25
	set scheme plotplain 
	twoway contour hyp3 hyp1 hyp2, ccuts(-.2 -.1 0 .1 .2) ztitle("Average Investigator UD, Y*=1") xtitle("White Mean Risk") ytitle("Black Mean Risk") ///
		ccolors("ltblue" "42 157 244" "208 239 255" "250 245 145" "255 195 52" "dkorange")  
	graph export "${output}figureA2c.pdf", replace

